##############################
##############################
###General Robustness Tests###
##############################
##############################
library(splm)
library(plm)
library(spdep)
library(sf)
library(dplyr)
library(doBy)
library(BAS)
#####Read in the data
###Set working directory
setwd("~/OneDrive - Indiana University/FromGoogle/DeforestationDisease/CodeFinal_12_5_24/")
full.2003.2019<- read.csv("afrogriddiseasejaxa20032019.csv")
summary(full.2003.2019$year)
###Create deforestation indicators
#Contemp
full.2003.2019$deforestation_shock2 <- ifelse(full.2003.2019$jaxa_forest_y_n==1, full.2003.2019$NDVI_mean_anom, 0)
summary(full.2003.2019$deforestation_shock2)


##############################
###Moran's I Geospatial Lag###
##############################
###Subset only relevant variables
full.2003.2019.s <- (full.2003.2019[,c("confirm","deforestation_shock2","log.nl","logged.pop","logged_state","logged_nonstate","p_anom","spei","t_anom","f_month2","f_month3","f_month4","f_month5","f_month6","f_month7","f_month8","f_month9","f_month10","f_month11","f_month12","year","gid","t")])
###Subset out the last year (no data on some controls)
full.2003.2019.s <- subset(full.2003.2019.s, year<=2018)
#Collapse data to create an indicator of missingness for a balanced panel
gid.ind <- summaryBy(. ~ gid, data=full.2003.2019.s, FUN=c("sum"), keep.names = T)
summary(gid.ind)
set.seed(123)
#Remove cells with missigness
gid.ind <- na.omit(gid.ind)
summary(gid.ind)
#Calculate the number of cells without missingness
length(unique(gid.ind$gid))
#Create an indicator to remove cells with NAs for all periods in the maindataset
gid.ind$plac <- 1
#Keep only the two indicators of interest
gid.ind <- gid.ind[,c("plac","gid")]

#Join to the main dataset
full.2003.2019.s<- left_join(full.2003.2019.s,gid.ind)
#Remove all cells with NA at some month in variables of interest
full.2003.2019.s <- subset(full.2003.2019.s, plac==1)
#Remove the plac variable
full.2003.2019.s$plac <- NULL
#Calcuate the length of cells with no NAs 
length(unique(full.2003.2019.s$gid))

####Create a PLM dataset 
afro.plm<- pdata.frame(full.2003.2019.s,index=c("gid","t"))
#Make sure the panel is balanced for times and space
afro.plm.times<- make.pbalanced(afro.plm,balance.type = "shared.times")
afro.plm.balanced<- make.pbalanced(afro.plm.times,balance.type = "shared.individuals")
#Remove the two older datasets to keep space in memory
afro.plm <- NULL
afro.plm.times <- NULL
#Create gid indicators
gid<- data.frame(unique(afro.plm.balanced$gid,))
gid$indiv <- 1:4085
names(gid) <- c("gid","indiv")
##Read in the grid shape file
polygons.prio<- st_read("priogrid_cellshp/priogrid_cell.shp")
#Rearrange by gid
polygons.prio<- polygons.prio %>% dplyr::select(gid)
polygons.prio.slice<- subset(polygons.prio, gid %in% afro.plm.balanced$gid)
#Create grid indicator
rownames(polygons.prio.slice)<- 1:4085
#Redesign the shapefiles as spatial tools
slice.sp<- as(polygons.prio.slice,"Spatial")
slice.nb<- poly2nb(slice.sp,queen=T)
# Row standardized
slice.listw<- nb2listw(slice.nb,style = "W",zero.policy = T)
#Redefine index variables as the first two columns and remove the ol
afro.plm.balanced<- merge(afro.plm.balanced,gid,by="gid")

####Calculate Moran's I
#Create an average grid level measure
af.lim <- summaryBy(confirm~gid, FUN=c("sum"), keep.names = T, na.rm=T, data=afro.plm.balanced)
inc.lag <- lag.listw(slice.listw, af.lim$confirm)
summary(inc.lag)
#Plot 
jpeg("figures3.jpeg", width = 6, height = 6, units = 'in', res = 500)
plot(inc.lag ~ af.lim$confirm, pch=16, asp=1, xlab="Zoonotic outbreaks", ylab="Spatial lag")
M1 <- lm(inc.lag ~ af.lim$confirm)
abline(M1, col="blue")
dev.off()
#Moran's coefficient 
coef(M1)[2] 
mean(af.lim$confirm)
#Robust standard errors
coeftest(M1, vcov = vcovHC, type="HC4")
###The value is 0.01721887 is practically zero, compared with a mean of mean(af.lim$confirm)=0.1554468


#######################################################
###Accounting for Endogeneity and Serial Correlation###
#######################################################

####The models are difference GMMs with twoway effects (hence the month FEs are ommitted), with six-month (technically seven) lags as instruments
####First subsample period: 2014-2018
#Subset data
full.2003.2019.s1 <- subset(full.2003.2019.s, (year>=2014))
#Convert data to pgmm formal
m.pgmm.1 <- pdata.frame(full.2003.2019.s1, index=c("gid", "t"))

#set seed
set.seed(50)
###Run model
gmm.1 <- pgmm(confirm ~ deforestation_shock2+
                log.nl + logged.pop +
                logged_state + logged_nonstate +
                p_anom+spei+t_anom| 
                lag(confirm, 2:8),
              data = m.pgmm.1, effect = "twoway", transformation= "d", model = "onestep")
#summary(gmm.1)
#Clear memory space
full.2003.2019.s1 <- NULL
m.pgmm.1 <- NULL

####Second subsample period: 2008-2013
#Subset data
full.2003.2019.s2 <- subset(full.2003.2019.s, (year>=2008 & year<=2013))
#Convert data to pgmm formal
m.pgmm.2 <- pdata.frame(full.2003.2019.s2, index=c("gid", "t"))

#set seed
set.seed(50)
###Run model
gmm.2 <- pgmm(confirm ~ deforestation_shock2+
                log.nl + logged.pop +
                logged_state + logged_nonstate +
                p_anom+spei+t_anom| 
                lag(confirm, 2:8),
              data = m.pgmm.2, effect = "twoway", transformation= "d", model = "onestep")
#summary(gmm.2)
#Clear memory space
full.2003.2019.s2 <- NULL
m.pgmm.2 <- NULL

####Third subsample period: 2003-2007
#Subset data
full.2003.2019.s3 <- subset(full.2003.2019.s, (year>=2003 & year<=2007))
#Convert data to pgmm formal
m.pgmm.3 <- pdata.frame(full.2003.2019.s3, index=c("gid", "t"))

#set seed
set.seed(50)
###Run model
gmm.3 <- pgmm(confirm ~ deforestation_shock2+
                log.nl + logged.pop +
                logged_state + logged_nonstate +
                p_anom+spei+t_anom| 
                lag(confirm, 2:8),
              data = m.pgmm.3, effect = "twoway", transformation= "d", model = "onestep")
#summary(gmm.3)
#Clear memory space
full.2003.2019.s3 <- NULL
m.pgmm.3 <- NULL

###Export all models to LaTex
stargazer(gmm.1, gmm.2, gmm.3)

##############################
###Baysiean Model Averaging###
##############################
###Subset the relevant variables (this one doesnt require a balanced panel like the other methods)
full.2003.2019.s <- full.2003.2019[,c("confirm","deforestation_shock2","log.nl","logged.pop","logged_state","logged_nonstate","p_anom","spei","t_anom","f_month2","f_month3","f_month4","f_month5","f_month6","f_month7","f_month8","f_month9","f_month10","f_month11","f_month12","gid","month","year","t", "c_gid")]
###Run the BMA model
bma_poisson <- bas.glm(
  confirm ~ deforestation_shock2 + log.nl + logged.pop +
    logged_state + logged_nonstate + p_anom + spei + t_anom +
    f_month2 + f_month3 + f_month4 + f_month5 + f_month6 +
    f_month7 + f_month8 + f_month9 + f_month10 + f_month11 +
    f_month12 + c_gid,
  data = full.2003.2019.s,
  family = poisson(),
  method = "MCMC", # or "BIC" for Bayesian Information Criterion selection
  MCMC.iterations = 10000
)
#Output
summary(bma_poisson)
##measures of weak state capacity (log.nl, logged.pop, and logged_nonstate) are the most importance, deforestation_shock2 is next, important, but not essential, but is still much better than env. conflict, 

